import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.api as sm


b = pd.read_csv('C:/Users/Kelly Nickelson/Desktop/UniversityTuition.csv')
b.head()


X = b['Year']
Y = b['Tuition']
model = sm.OLS(Y, X).fit()
predictions = model.predict(X)
print_model = model.summary()
print(print_model)

                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                Tuition   R-squared (uncentered):                   0.573
Model:                            OLS   Adj. R-squared (uncentered):              0.565
Method:                 Least Squares   F-statistic:                              71.04
Date:                Wed, 09 Mar 2022   Prob (F-statistic):                    2.33e-11
Time:                        19:27:18   Log-Likelihood:                         -507.88
No. Observations:                  54   AIC:                                      1018.
Df Residuals:                      53   BIC:                                      1020.
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Year           1.7083      0.203      8.428      0.000       1.302       2.115
==============================================================================
Omnibus:                        8.363   Durbin-Watson:                   0.005
Prob(Omnibus):                  0.015   Jarque-Bera (JB):                6.765
Skew:                           0.759   Prob(JB):                       0.0340
Kurtosis:                       2.161   Cond. No.                         1.00
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.


import matplotlib.pyplot as plt

#create basic scatterplot
plt.plot(X, Y, 'o')
plt.title("Average Public 4-Year Institutions Tuition")
plt.xlabel("Year")
plt.ylabel("Public 4-Year Tuition ($)")

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(X, Y, 1)

#add linear regression line to scatterplot 
plt.plot(X, m*X+b)

[<matplotlib.lines.Line2D at 0x1f6d132f460>]

	Year	Tuition
0	2019	9349
1	2018	9212
2	2017	9036
3	2016	8804
4	2015	8778

Linear Regression: Rising Tuition Costs¶

Introduction¶

Libraries¶

Import Data¶

Simple Linear Regression¶

Model Visualization¶